#!/bin/sh

#Copyright (c) 2014 Luigi Tarenga <luigi.tarenga@gmail.com>
#Distributed under the terms of a MIT license.

export LANG=C

#check if jobs control is required for a correct wait command
shell_cmd=$(head -1 $0)
case $(uname) in
 HP-UX)
   case "$shell_cmd" in
    */sh|*/ksh)
      set -m
    ;;
   esac
 ;;
esac

cd ${0%/*}
[ -f conf/lock_path ] && read lock_path < conf/lock_path
export PATH=${lock_path:-../dex-lock}:$PATH

if [ ! -x  ${lock_path:-../dex-lock}/dex-lock ] ; then
   echo "back-to-work need dex-lock to run. install it and configure its location."
   exit 1
fi
unset lock_path

package=$1
if [ -z "$package" ] ; then
   echo "Usage: ${0##*/} { package-name}"
   exit 0
fi

read_conf () {
   pace=$(packages/$package echo pace); pace=${pace:-4}
   timeout=$(packages/$package echo timeout); timeout=${timeout:-30}
   timeout2=$(packages/$package echo stop_timeout); timeout2=${timeout2:-30}
   start_timeout=$(packages/$package echo start_timeout); start_timeout=${start_timeout:-30}
   stop_timeout=$timeout2
   shell_cmd=$(head -1 packages/$package); shell_cmd=${shell_cmd#\#!}
}

quit () {
   rm -f temp/lockdir_$package/role temp/lockdir_$package/pkg_state
   rmdir temp/lockdir_$package
   dex-release $package
   exit $1
}

#do not immediately exit on kill! we must stop the package before quitting!
trap 'echo "$(date) received signal SIGHUP. starting halt sequence." ; must_halt="true"' HUP
trap 'echo "$(date) received signal SIGINT. starting halt sequence." ; must_halt="true"' INT
trap 'echo "$(date) received signal SIGTERM. starting halt sequence." ; must_halt="true"' TERM
trap 'echo "$(date) received signal SIGUSR1. re-reading package parameters." ; read_conf' USR1

if ! mkdir temp/lockdir_$package 2> /dev/null ; then
   echo "temp/lockdir_$1 already exist. back-to-work is already running for package $1."
   exit 1
fi

read_conf
role="stand-by"
must_halt="false"
must_fail="false"
echo $role > temp/lockdir_$package/role
echo ""    > temp/lockdir_$package/pkg_state
#avoid using built-in sleep (see mksh) in backgroud. 
#it creates a second back-to-work process. ugly.
sleep_cmd=$(which sleep)

dex-init $package $timeout $timeout2

###############################################
#these function are here to speed up the parser 

_starting () {
   if ! kill -0 $start_pid 2> /dev/null ; then
      wait $start_pid
      if [ $? -eq 0 ] ; then
         packages/$package status >> log/$package.log 2>&1 & status_pid=$!
         pkg_state="running"
      else
         echo "$(date) start script exited with error."
         packages/$package stop >> log/$package.log 2>&1 & stop_pid=$!
         pkg_state="failing"
      fi
   else
      start_timeout=$((start_timeout-pace))
   fi

   if [ $start_timeout -le $pace ] ; then
      echo "$(date) timeout waiting for package to start."
      kill $start_pid 2> /dev/null ; sleep 1
      packages/$package stop >> log/$package.log 2>&1 & stop_pid=$!
      pkg_state="failing"
   fi
}

_running () {
   if ! kill -0 $status_pid 2> /dev/null ; then
      #status function returns: 0 up, 1 down, 2 degraded, 3 critical
      wait $status_pid
      case $? in
       0) #monitor exited with 0. fine. start a new one.
         packages/$package status >> log/$package.log 2>&1 & status_pid=$!
         if [ $healthy_timeout -ne $timeout ] ; then
            echo "$(date) package status is healthy again."
            healthy_timeout=$timeout
         fi
       ;;
       1) #package is down... clean stop it anyway.
         echo "$(date) package status is unexpectedly down!"
         packages/$package stop >> log/$package.log 2>&1 & stop_pid=$!
         pkg_state="failing"
       ;;
       2) #package is degraded... monitor and wait for self healing.
         echo "$(date) package status degraded..."
         packages/$package status >> log/$package.log 2>&1 & status_pid=$!
         healthy_timeout=$((healthy_timeout-pace))
       ;;
       *) #a critical error has been detected. stop immediately.
         echo "$(date) package status is critical! manual intervention required!"
         packages/$package stop >> log/$package.log 2>&1 & stop_pid=$!
         pkg_state="halting"
       ;;
      esac
   else
      #monitor is taking some time... assume degraded
      healthy_timeout=$((healthy_timeout-pace))
   fi

   if [ $healthy_timeout -le $pace ] ; then
      echo "$(date) error: timeout waiting for package good health."
      packages/$package stop >> log/$package.log 2>&1 & stop_pid=$!
      pkg_state="failing"
   fi
}

_halting () {
   if ! kill -0 $stop_pid 2> /dev/null ; then
      wait $stop_pid
      if [ $? -eq 0 ] ; then
         [ "$pkg_state" = "failing" ] && pkg_state="failed"
         [ "$pkg_state" = "halting" ] && pkg_state="halted"
      else
         #if stop script returns error it's safer to quit
         echo "$(date) stop script exited with error."
         pkg_state="halted"
      fi
   else
      stop_timeout=$((stop_timeout-pace))
   fi

   if [ $stop_timeout -le $pace ] ; then
      echo "$(date) timeout waiting for package to stop."
      kill $stop_pid
      pkg_state="halted"
   fi
}

_halted () {
   if [ "$pkg_state" = "halted" ] ; then
      echo "$(date) package $pkg_state. exit."
      quit 0
   fi
   echo "$(date) package $pkg_state. restarting as stand-by."
   dex-release $package $timeout $timeout2 ; sleep $pace
   role="stand-by" ; must_fail="false"
   echo $role > temp/lockdir_$package/role
   echo ""    > temp/lockdir_$package/pkg_state
}

_unknown () {
   if ! kill -0 $status_pid 2> /dev/null ; then
      #status function returns: 0 up, 1 down, 2 degraded, 3 critical
      wait $status_pid
      case $? in
       0|2) #package is not down... this should never happen :(.
         echo "$(date) package status is not down."
         packages/$package stop >> log/$package.log 2>&1 & stop_pid=$!
         pkg_state="failing"
       ;;

       1) #package is down... nice we can start it.
         packages/$package start >> log/$package.log 2>&1 & start_pid=$!
         pkg_state="starting"
         healthy_timeout=$timeout
       ;;

       *) #a critical error has been detected. stop immediately.
         echo "$(date) package status is critical! manual intervention required!"
         packages/$package stop >> log/$package.log 2>&1 & stop_pid=$!
         pkg_state="halting"
       ;;
      esac
   else
      #monitor is taking some time... let's be patient and wait...
      healthy_timeout=$((healthy_timeout-pace))
   fi

   if [ $healthy_timeout -le $pace ] ; then
      echo "$(date) error: timeout waiting for package status. "
      packages/$package stop >> log/$package.log 2>&1 & stop_pid=$!
      pkg_state="failing"
   fi
}

_activate () {
   echo "$(date) got lock. switch to active node."
   healthy_timeout=$timeout
   start_timeout=$(packages/$package echo start_timeout)
   start_timeout=${start_timeout:-30}
   stop_timeout=$timeout2
   pkg_state="unknown"
   role="active"
   echo $role      > temp/lockdir_$package/role
   echo $pkg_state > temp/lockdir_$package/pkg_state

   if utils/pkill -0 -x -f "$shell_cmd packages/$package start" ; then
      echo "$(date) error: start script already running."
      quit 1
   fi
   if utils/pkill -0 -x -f "$shell_cmd packages/$package stop" ; then
      echo "$(date) error: stop script already running."
      quit 1
   fi

   echo "$(date) package state initialized to $pkg_state."
   packages/$package status >> log/$package.log 2>&1 & status_pid=$!
}

_must_halt_fail () {
   if [ "$must_halt" = "true" -o "$must_fail" = "true" ] ; then
      [ -n "$start_pid" ] && kill $start_pid 2> /dev/null && sleep 1
      packages/$package stop >> log/$package.log 2>&1 & stop_pid=$!
      [ "$must_fail" = "true" ] && pkg_state="failing"
      [ "$must_halt" = "true" ] && pkg_state="halting"
      return 0
   fi
   return 1
}

###############################################

printf "$(date) starting as stand-by node.\n" & sleep_pid=$!
while true ; do
   wait $sleep_pid ; $sleep_cmd $pace & sleep_pid=$!
   case $role in
    stand-by)
      [ "$must_halt" = "true" ] && quit 0
      dex-lock $package $timeout $timeout2 $pace && _activate
    ;;

    active)
      if ! dex-lock $package $timeout $timeout2 $pace ; then
         [ "$must_fail" = "false" ] && echo "$(date) error: lock lost. failing package."
         must_fail="true"
      fi

      old_state=$pkg_state
      #using functions to speed up the parser (I hope :P)
      case $pkg_state in
       starting) _must_halt_fail || _starting ;;
       running)  _must_halt_fail || _running ;;
       halting|failing)             _halting ;;
       halted|failed)               _halted ;;
       unknown)  _must_halt_fail || _unknown ;;
      esac # package_state

      if [ "$old_state" != "$pkg_state" ] ; then
         echo "$(date) package state changes to $pkg_state."
         echo $role      > temp/lockdir_$package/role
         echo $pkg_state > temp/lockdir_$package/pkg_state
      fi
    ;;
   esac # role
done
